Chapter 6 Feature Analysis
The code for the analysis and occassional brief commentary is included below. The process of the exploration is expanded on in our verbal project report.
#READING IN THE DATA
#MTG Data with 2019 Market
mtg <- read_csv("cleanData_New.csv", col_names = TRUE)
#MTG Data with 2019 Market for use in text
cmtg <- mtg[,-1]
#MTG Data with 2020 Market
marketDf <- read_csv("mtgMarketInfo.csv", col_names = TRUE)# TOKENIZING SUBTYPES
mtg$subtypes <- str_split(mtg$subtypes, ",")
# TOKENIZING KEYWORDS
mtg$keywords <- str_split(mtg$keywords, ",")
# TURNING RARITY INTO A FACTOR
mtg$rarity <- factor(mtg$rarity, levels = c("common", "uncommon", "rare", "mythic"), ordered = TRUE)
#FORMATTING POWER AND TOUGHNESS CORRECTLY
# forces some to numeric, however upon investigation the cards turned to NA's are 'booster' cards, which are like spell cards
# these cards can be identified by their key words
mtg$power <- as.numeric(mtg$power)
mtg$toughness <- as.numeric(mtg$toughness)6.1 Prices
marketDf$Market.Price <- as.numeric(gsub("\\$", "", marketDf$`Market Price`))
marketDf$Listed.Median <- as.numeric(gsub("\\$", "", marketDf$`Listed Median`))
marketDf$Card.Name <- marketDf$`Card Name`
marketDf$Set.Name <- marketDf$`Set Name`
mergedDf <- dplyr::left_join(marketDf, mtg, by=c("Card.Name" = "name"))
mergedDf$releaseDate <- as.Date(mergedDf$releaseDate)# Group by both rarity and card "type"
# Averaged market prices of cards in each of these groups
typeDf <- aggregate( Market.Price ~ rarity+type, mergedDf, mean )
# Histogram of average market price by card type (i.e. land, creature, etc.)
fig <- typeDf %>%
plot_ly(
type='histogram',
nbinsx = 40,
x=~Market.Price,
bingroup=1, color = ~rarity) %>%
layout(title = 'Average Market Price by Card Type',
xaxis = list(title = 'Avg Market Price [USD]',range = c(-3, 65)))
fig# Boxplot average market price by card type (i.e. land, creature, etc.)
typeDf %>%
plot_ly() %>%
add_trace(x = ~as.numeric(rarity),y = ~Market.Price, color = ~rarity, type = "box",
hoverinfo = 'name+y') %>%
add_markers(x = ~jitter(as.numeric(rarity)), y = ~Market.Price, color = ~rarity,
marker = list(size = 6),
hoverinfo = "text",
text = ~paste0("Type: ",type,
"<br>Rarity: ",rarity,
"<br>Avg Price: ",round(Market.Price,2)),
showlegend = FALSE) %>%
layout(legend = list(orientation = "h",
x =0.6, xanchor = "center",
y = 1, yanchor = "bottom"
),
xaxis = list(title = "Rarity",
showticklabels = FALSE),
yaxis = list(title = "Avg Market Price [USD]",
showticklabels = FALSE),
title = list(text = 'Price by Card Type',
x = 0.08))setDf <- aggregate( Market.Price ~ rarity+Set.Name, mergedDf, mean )
# Histogram of average market price by card set (i.e. Alpha Edition, Arabian Nights, etc.)
fig <- setDf %>%
plot_ly(
type='histogram',
nbinsx = 30,
x=~Market.Price,
bingroup=1, color = ~rarity) %>%
layout(title = 'Average Market Price by Card Set',
xaxis = list(title = 'Avg Market Price [USD]',range = c(-10, 140)))
fig# Boxplot average market price by card set (i.e. Beta Edition, Alpha Edition, etc.)
setDf %>%
plot_ly() %>%
add_trace(x = ~as.numeric(rarity),y = ~Market.Price, color = ~rarity, type = "box",
hoverinfo = 'name+y') %>%
add_markers(x = ~jitter(as.numeric(rarity)), y = ~Market.Price, color = ~rarity,
marker = list(size = 6),
hoverinfo = "text",
text = ~paste0("Set: ",Set.Name,
"<br>Rarity: ",rarity,
"<br>Avg Price: ",round(Market.Price,2)),
showlegend = FALSE) %>%
layout(legend = list(orientation = "h",
x =0.6, xanchor = "center",
y = 1, yanchor = "bottom"
),
xaxis = list(title = "Rarity",
showticklabels = FALSE),
yaxis = list(title = "Avg Market Price [USD]",
showticklabels = FALSE),
title = list(text = 'Price by Card Set',
x = 0.08))# Create new attribute of summed power and toughness
mergedDf$power.toughness <- mergedDf$power + mergedDf$toughness
# The next four plots (arranged with ggarrange) investigate the relationship
# of market price vs toughness, mana cost, and power
power.scatter <- mergedDf %>%
ggplot(.,aes(y = Market.Price, x = power, color = rarity)) +
geom_point()+ylab('Market Price')+xlab('Power')+ylim(0,610)+
ggtitle('Price v. Power')+theme(plot.title = element_text(hjust = 0.5))+theme(legend.position="none")+theme(panel.background = element_blank())
toughness.scatter <- mergedDf %>%
ggplot(.,aes(y = Market.Price, x = toughness, color = rarity)) +
geom_point()+ylab('Market Price')+xlab('Toughness')+ylim(0,610)+
ggtitle('Price v. Toughness')+theme(plot.title = element_text(hjust = 0.5)) + theme(legend.position="none")+theme(panel.background = element_blank())
mana.scatter <- subset(mergedDf, mergedDf$convertedManaCost != max(mergedDf$convertedManaCost, na.rm=T)) %>%
ggplot(.,aes(y = Market.Price, x = convertedManaCost, color = rarity)) +
geom_point()+ylab('Market Price')+xlab('Mana Cost')+ylim(0,610)+
ggtitle('Price v. Mana Cost')+theme(plot.title = element_text(hjust = 0.5)) + theme(legend.position="none")+theme(panel.background = element_blank())
net.scatter <- mergedDf %>%
ggplot(.,aes(y = Market.Price, x = power.toughness, color = rarity)) +
geom_point()+ylab('Market Price')+xlab('Power + Toughness')+ylim(0,610)+
ggtitle('Price v. Power + Toughness')+theme(plot.title = element_text(hjust = 0.5))+theme(panel.background = element_blank())
ggarrange(power.scatter, toughness.scatter, mana.scatter, net.scatter)
# 3D scatter plot of power, toughness, mana cost (w/ color mapped to rarity)
# Shows linear relationship between power, toughness, and mana
fig <- plot_ly(mergedDf, x = ~power, y = ~toughness, z = ~convertedManaCost, color = ~rarity)
fig <- fig %>% add_markers(hoverinfo = "text",
text = ~paste0("Power: ",power,
"<br>Toughness: ",toughness,
"<br>Mana Cost: ",convertedManaCost
))
fig <- fig %>% layout(scene = list(xaxis = list(title = 'Power'),
yaxis = list(title = 'Toughness'),
zaxis = list(title = 'Mana Cost')),
title = list(text = 'Mana Cost v Power/Toughness'))
fig6.2 Artist Influence
# Group by both rarity and card artist
# Averaged market prices of cards in each of these groups
artistDf <- aggregate( Market.Price ~ rarity+artist, mergedDf, mean )
# Histogram of artists' average card selling price
fig <- artistDf %>%
plot_ly(
type='histogram',
nbinsx = 30,
x=~Market.Price,
bingroup=1, color = ~rarity) %>%
layout(title = 'Average Market Price by Artist',
xaxis = list(title = 'Avg Market Price [USD]',range = c(-10, 135)))
fig# Boxplot of artists' selling prices
artistDf %>%
plot_ly() %>%
add_trace(x = ~as.numeric(rarity),y = ~Market.Price, color = ~rarity, type = "box",
hoverinfo = 'name+y') %>%
add_markers(x = ~jitter(as.numeric(rarity)), y = ~Market.Price, color = ~rarity,
marker = list(size = 6),
hoverinfo = "text",
text = ~paste0("<br>Rarity: ",rarity,
"<br>Avg Price: ",round(Market.Price,2)),
showlegend = FALSE) %>%
layout(legend = list(orientation = "h",
x =0.6, xanchor = "center",
y = 1, yanchor = "bottom"
),
xaxis = list(title = "Rarity",
showticklabels = FALSE),
yaxis = list(title = "Avg Market Price [USD]",
showticklabels = FALSE),
title = list(text = 'Price by Card Artist',
x = 0.08))6.2.1 Hall of Fame
The following artist have made art for cards that sold for over $1000.
# artist vs price
cards[,c("artist","mtgo","mtgoFoil","paper","paperFoil")] %>%
gather(mtgo,mtgoFoil,paper,paperFoil,
key='paperType',
value='price',
na.rm = T) %>%
filter(!is.na(artist) & price >= 1000) %>%
ggplot(aes(x = artist, y = price, fill=I('blueviolet'))) +
geom_col(position='dodge') +
theme(axis.text.x = element_text(angle = 90))+
geom_hline(aes(yintercept=1000),linetype=2) +
geom_text(aes(1,1000,label = 1000, vjust = -1)) +
labs(title = 'Artists', caption='Greater than $1000') +
xlab('Artist') +
ylab('Price (USD)')
6.3 Game Mechanics
#the card data read from csv, stored as dataframe
mtg <- read.csv('cleanData_New.csv', header = T)
mtg <- as.data.frame(mtg)
#converting string data to numeric
mtg[,'power'] <- as.numeric(mtg[,'power'])## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
#filtering the data to remove NA's and outliers, aggregating toughness/power into "stats"
mtg.0 <- mtg %>%
replace_na(list(toughness = 0, power = 0)) %>%
mutate(stats = power + toughness) %>%
filter(convertedManaCost < 100)
#fitting linear regression line
fit <- lm(convertedManaCost ~ stats, data=mtg.0)
#creating plot
fig <- plot_ly(data = mtg.0) %>%
add_markers(x = ~stats, y = ~convertedManaCost) %>%
add_lines(x = ~stats, y = fitted(fit)) %>%
layout(showlegend = F) %>%
layout(xaxis = list(title = "Card Stats"), yaxis = list(title = "Mana Cost")) %>%
layout(title="Creature Stats vs Mana Cost Regression")
fig#subsetting the data for creature cards
# removing NA's
#gouping by creature subtype and summarising
mtg.1 <- mtg %>%
filter(type == 'Creature') %>%
replace_na(list(toughness = 0, power = 0)) %>%
mutate(stats = power + toughness) %>%
select(stats, convertedManaCost, subtypes) %>%
group_by(subtypes) %>%
summarise_at(vars(stats:convertedManaCost), mean, na.rm = TRUE)
# Initiating the interactive plot
fig1 <- plot_ly(data = mtg.1, x = ~stats, y = ~convertedManaCost, color = ~subtypes)
# Adding title to the axis legend
fig1 <- fig1 %>%
layout(xaxis = list(title = "Mean Creature Stats"), yaxis = list(title = "Mean Mana Cost"))
# Adding title to plot, creating interactive markers for each point
fig1 <- fig1 %>% layout(showlegend = FALSE,
title='Mean Mana Cost and Stats by Subtype') %>%
add_markers(hoverinfo = 'text',
text = ~paste('</br> Subtype: ', subtypes,
'</br> Creature Stats: ', stats,
'</br> Mean Mana Cost: ', convertedManaCost))
fig1# Subsetting the data to show only creatures
# Then groups by subtypes and total stats
# Last, summarises the mean mana cost for each group
mtg.2 <- mtg %>%
filter(type == 'Creature') %>%
replace_na(list(toughness = 0, power = 0)) %>%
mutate(stats = power + toughness) %>%
select(stats, convertedManaCost, subtypes) %>%
group_by(subtypes, stats) %>%
summarise(meanManaCost = mean(convertedManaCost))
# Initiates the interactive plot
fig2 <- plot_ly(data = mtg.2, x = ~stats, y = ~meanManaCost, color = ~subtypes)
# Adds title to the axes legend
fig2 <- fig2 %>%
layout(xaxis = list(title = "Creature Stats"), yaxis = list(title = "Mean Mana Cost"))
# Adss title to plot, then creates interactive markers for each point
fig2 <- fig2 %>% layout(showlegend = FALSE,
title='Mean Mana Cost by Creature Stats and Subtype') %>%
add_markers(hoverinfo = 'text',
text = ~paste('</br> Subtype: ', subtypes,
'</br> Creature Stats: ', stats,
'</br> Mean Mana Cost: ', meanManaCost))
fig2# reading the market data csv
marketDf <- read.csv('mtgMarketInfo.csv', header=TRUE)
#removing the symbols and converting to numeric
marketDf$Market.Price = as.numeric(gsub("\\$", "", marketDf$Market.Price))
marketDf$Listed.Median = as.numeric(gsub("\\$", "", marketDf$Listed.Median))
#stripping whitespace from strings
marketDf$Rarity = gsub(" ", "", marketDf$Rarity, fixed = TRUE)
#filtering data to remove outliers
marketDf1 <- marketDf %>%
filter(Listed.Median <= 50, Market.Price <= 50)
#plotting the data in ggplot
p <- ggplot(
marketDf1,
aes(x = Listed.Median, y = Market.Price, color = Rarity)
) +
geom_point(show.legend = FALSE, alpha =0.7) +
scale_color_viridis_d() +
scale_size(range = c(1, 12)) +
scale_x_log10() +
labs(x = "Listed Median Price", y="Market Price") +
ggtitle('Market Price vs Listed Median Price by Set')
#printing the static ggplot
p
#adding an animated transition for the plot in gganimate
a <- p + transition_states(Set.Name) +
labs(title = "Set: {closest_state}")
#creating parameters and rendering the animation as GIF with gifski
animate(a, fps = 3, width = 750, height = 450, renderer=gifski_renderer())
#subsetting the data and summarizing by grouped set and rarity
marketDf2 <- marketDf %>%
select(Set.Name, Rarity, Listed.Median) %>%
filter(Rarity=="C" | Rarity=="U" | Rarity=="R" | Rarity=="M") %>%
na.omit() %>%
group_by(Set.Name,Rarity) %>%
summarise(mean.listed = mean(Listed.Median)) %>%
filter(mean.listed <= 40)
#creating bar chart in ggplot
p<-ggplot(data=marketDf2, aes(x=Rarity, y=mean.listed, fill=Rarity)) +
geom_bar(stat="identity") +
scale_fill_hue(c=45, l=80) +
labs(title = 'Mean Card Value by Rarity and Set',
y = "Mean Listed Price")
p
#converting plot into animated timelapse(by set)
a <- p + transition_states(Set.Name) +
labs(title= "Set: {closest_state}")
#rendering plot as a GIF
animate(a, fps = 3, width = 750, height = 450, renderer=gifski_renderer())